package crawlerTools;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

/**
 * Keafmd
 *
 * @ClassName: HtmlParse
 * @Description: 解析html的工具类
 * @author: ganjanwen
 * @date: 2021/12/26 0:31
 * @Gitee: https://gitee.com/gan_jian_wen_main
 */
public class HtmlParse {
    private String html;
    private Document document;
    private String xpathValue;
    private List<HtmlParse> xpathList;


    public HtmlParse(String html) {
        this.html = html;
        this.document = Jsoup.parse(html);
    }

    public HtmlParse xpath(String xpath) {
        Elements elements = this.document.selectXpath(xpath);
        if (elements.size() <= 0) {
            return this;
        } else if (elements.size() == 1) {
            this.xpathValue = elements.get(0).text();
            return this;
        } else {
            List<HtmlParse> parseList = new ArrayList<HtmlParse>();
            for (Element element : elements) {
                parseList.add(new HtmlParse(element.html()));
            }
            this.xpathList = parseList;
            return this;
        }
    }

    public String text() {
        if (StringUtils.isNotEmpty(this.xpathValue)) {
            return this.xpathValue;
        } else {
            return this.xpathList.toString();
        }
    }

    public List<HtmlParse> getList() {
        return this.xpathList;
    }

    @Override
    public String toString() {
        return this.html;
    }
}
